import numpy as np
import pandas as pd
import os
import re

#import files
path=r'xxxx'
df_Cap=pd.read_excel(path,'Company-Capacity')
df_CountryCrudesteel=pd.read_excel(path,'Crudesteel-2019pro')
df_CountryDRI=pd.read_excel(path,'DRI-2019pro')
df_CountryPigiron=pd.read_excel(path,'Pigiron-2019pro')
df_CountryCoke=pd.read_excel(path,'coke-2019pro')
df_CountrySinter=pd.read_excel(path,'sinter-2019pro')

#del zz unidentified plants
df_Cap.loc[df_Cap['Plant Name']=='zz unidentified plants','EAF steel']=0
df_Cap.loc[df_Cap['Plant Name']=='zz unidentified plants','BOF steel']=0
df_Cap.loc[df_Cap['Plant Name']=='zz unidentified plants','OHF steel']=0
df_Cap.loc[df_Cap['Plant Name']=='zz unidentified plants','DRI']=0
df_Cap.loc[df_Cap['Plant Name']=='zz unidentified plants','total pig iron']=0
df_Cap.loc[df_Cap['Plant Name']=='zz unidentified plants','BF pig iron']=0
df_Cap.loc[df_Cap['Plant Name']=='zz unidentified plants','other pig iron']=0
df_Cap.loc[df_Cap['Plant Name']=='zz unidentified plants','coke']=0
df_Cap.loc[df_Cap['Plant Name']=='zz unidentified plants','sinter']=0

#Country-level Capacity
df_CountryCapEAF=df_Cap.groupby(['Plant Country'])['EAF steel'].sum() 
df_CountryCapEAF.head() #查看

df_CountryCapBOF=df_Cap.groupby(['Plant Country'])['BOF steel'].sum() 
df_CountryCapOHF=df_Cap.groupby(['Plant Country'])['OHF steel'].sum() 

df_CountryCapDRI=df_Cap.groupby(['Plant Country'])['DRI'].sum() 
df_CountryCapPigiron=df_Cap.groupby(['Plant Country'])['total pig iron'].sum() 
df_CountryCapBFiron=df_Cap.groupby(['Plant Country'])['BF pig iron'].sum() 

df_CountryCapCoke=df_Cap.groupby(['Plant Country'])['coke'].sum() 
df_CountryCapSinter=df_Cap.groupby(['Plant Country'])['sinter'].sum() 

#Country-level Output
df_CountryProEAF=df_CountryCrudesteel[['Plant Country','EAF']] 
df_CountryProBOF=df_CountryCrudesteel[['Plant Country','BOF']] 
df_CountryProOHF=df_CountryCrudesteel[['Plant Country','OHF']] 

df_CountryProDRI=df_CountryDRI[['Plant Country','2019 DRI production']] 
df_CountryProPigiron=df_CountryPigiron[['Plant Country','2019 Pigiron production']] 

df_CountryProCoke=df_CountryCoke[['Plant Country','coke output (kt)']] 
df_CountryProSinter=df_CountrySinter[['Plant Country','sinter output (kt)']] 

#Country-level Utilization rate
data_mergeEAF = pd.merge(df_CountryCapEAF, df_CountryProEAF, on='Plant Country', how='outer') 
data_mergeEAF.fillna(0, inplace=True)
data_mergeEAF['EAFOperationRate'] = data_mergeEAF['EAF'] / data_mergeEAF['EAF steel']
data_mergeEAF[['EAF', 'EAF steel', 'EAFOperationRate']] = data_mergeEAF[['EAF', 'EAF steel', 'EAFOperationRate']].astype('double') 
del data_mergeEAF['EAF steel'] 

data_mergeBOF = pd.merge(df_CountryCapBOF, df_CountryProBOF, on='Plant Country', how='outer') 
data_mergeBOF.fillna(0, inplace=True)
data_mergeBOF['BOFOperationRate'] = data_mergeBOF['BOF'] / data_mergeBOF['BOF steel']
data_mergeBOF[['BOF', 'BOF steel', 'BOFOperationRate']] = data_mergeBOF[['BOF', 'BOF steel', 'BOFOperationRate']].astype('double')
del data_mergeBOF['BOF steel']

data_mergeOHF = pd.merge(df_CountryCapOHF, df_CountryProOHF, on='Plant Country', how='outer')
data_mergeOHF.fillna(0, inplace=True)
data_mergeOHF['OHFOperationRate'] = data_mergeOHF['OHF'] / data_mergeOHF['OHF steel']
data_mergeOHF[['OHF', 'OHF steel', 'OHFOperationRate']] = data_mergeOHF[['OHF', 'OHF steel', 'OHFOperationRate']].astype('double')
del data_mergeOHF['OHF steel'] 

data_mergeDRI = pd.merge(df_CountryCapDRI, df_CountryProDRI, on='Plant Country', how='outer') 
data_mergeDRI.fillna(0, inplace=True)
data_mergeDRI['DRIOperationRate'] = data_mergeDRI['2019 DRI production']/data_mergeDRI['DRI']
data_mergeDRI[['DRI', '2019 DRI production', 'DRIOperationRate']] = data_mergeDRI[['DRI', '2019 DRI production', 'DRIOperationRate']].astype('double') 
del data_mergeDRI['DRI']

data_mergePigiron = pd.merge(df_CountryCapPigiron, df_CountryProPigiron, on='Plant Country', how='outer') 
data_mergePigiron.fillna(0, inplace=True)
data_mergePigiron['PigironOperationRate'] = data_mergePigiron['2019 Pigiron production']/data_mergePigiron['total pig iron'] 
data_mergePigiron[['total pig iron', '2019 Pigiron production', 'PigironOperationRate']] = data_mergePigiron[['total pig iron', '2019 Pigiron production', 'PigironOperationRate']].astype('double') #数据类型转换
del data_mergePigiron['total pig iron']

data_mergeCoke = pd.merge(df_CountryCapCoke, df_CountryProCoke, on='Plant Country', how='outer')
data_mergeCoke.fillna(0, inplace=True)
data_mergeCoke['CokeOperationRate'] = data_mergeCoke['coke output (kt)']/data_mergecoke['coke'] 
data_mergeCoke[['coke', 'coke output (kt)', 'CokeOperationRate']] = data_mergeCoke[['coke', 'coke output (kt)', 'CokeOperationRate']].astype('double')
del data_mergeCoke['coke']

data_mergeSinter = pd.merge(df_CountryCapSinter, df_CountryProSinter, on='Plant Country', how='outer')
data_mergeSinter.fillna(0, inplace=True)
data_mergeSinter['SinterOperationRate'] = data_mergeSinter['sinter output (kt)']/data_mergeSinter['sinter'] 
data_mergeSinter[['sinter', 'sinter output (kt)', 'SinterOperationRate']] = data_mergeSinter[['sinter', 'sinter output (kt)', 'SinterOperationRate']].astype('double') 
del data_mergeSinter['sinter']


#Plant output（without zz unidentified plants）
from functools import reduce
df_groups = [df_Cap,data_mergeEAF, data_mergeBOF, data_mergeOHF, data_mergeDRI,data_mergePigiron,data_mergeCoke,data_mergeSinter]
df_mergedplant = reduce(lambda left, right: pd.merge(left, right, on='Plant Country', how='outer'), df_groups)

df_Cap['EAF output (kt)']=df_mergedplant['EAF steel']*df_mergedplant['EAFOperationRate'] 
df_Cap['BOF output (kt)']=df_mergedplant['BOF steel']*df_mergedplant['BOFOperationRate'] 
df_Cap['OHF output (kt)']=df_mergedplant['OHF steel']*df_mergedplant['OHFOperationRate'] 
df_Cap['DRI output (kt)']=df_mergedplant['DRI']*df_mergedplant['DRIOperationRate'] 
df_Cap['total pig iron output (kt)']=df_mergedplant['total pig iron']*df_mergedplant['PigironOperationRate'] 
df_Cap['coke output (kt)']=df_mergedplant['coke']*df_mergedplant['CokeOperationRate'] 
df_Cap['sinter output (kt)']=df_mergedplant['sinter']*df_mergedplant['SinterOperationRate'] 
df_Cap['BF pig iron output (kt)']=df_mergedplant['BF pig iron']/df_mergedplant['total pig iron']*df_mergedplant['PigironOperationRate'] 

df_Cap.replace([np.inf, -np.inf], np.nan, inplace=True) 
df_Cap.fillna(0, inplace=True) 

outpath=r'xxx'
df_Cap.to_csv(outpath+r'\Global Steeplant capacity and production.csv')
